library(stringr)
library(feather)
library(koRpus)
library(tidyverse)
library(lme4)
library(dplyr)
library(sjPlot)
library(corrplot)
library(tidytext)
library(tm)
library(childesr)
# Load data
childes_all <- read_feather("/Users/Yawen/Desktop/lexical diversity/triaL6_childes/childes_all.feather")%>%
filter(age >=14 & age <=58) # align age range with that of LDP
ldp_all <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/ldp_all.feather")
# Plot Growth Curve
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mlu), color="mlu"), se=F)+
theme_classic()+
labs(title = "CHILDES: Growth Curve of Lexical Diversity",
subtitle = "14 ~ 58 Months",
y = "lexical diversity (scaled)")
# facet by UK/US group
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mlu), color="mlu"), se=F)+
facet_grid(~group)+
theme_classic()+
labs(title = "CHILDES: Growth Curve of Lexical Diversity",
subtitle = "14 ~ 58 Months",
y = "lexical diversity (scaled)")
# compare with CDI
ldp_all%>%
group_by(subject)%>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age, y=scale(cdi), color="CDI"),se=F)+
xlim(14,30)+
theme_classic()+
labs(title = "Compare Lexical Diversity Indices with CDI",
subtitle = "LDP: 18 ~ 30 Months",
y = "lexical diversity (scaled)")
# compare with PPVT
ldp_all%>%
group_by(subject)%>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age, y=scale(ppvt), color="PPVT"),se=F)+
xlim(30, 53)+
theme_classic()+
labs(title = "Compare Lexical Diversity Indices with PPVT",
subtitle = "LDP: 30 ~ 53 Months",
y = "lexical diversity (scaled)")
# TTR vs MATTR
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
theme_classic()+
labs(title = "Growth Curve by TTR & MATTR",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by TTR & MATTR",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
# MTLD vs vocd-D (Kid)
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=kid_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=kid_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by MTLD & vocd-D",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=kid_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=kid_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by MTLD & vocd-D",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
# MTLD vs vocd-D (Mother)
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=mom_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=mom_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Mother's Growth Curve by MTLD & vocd-D",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=mom_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=mom_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Mother's Growth Curve by MTLD & vocd-D",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
Get parameters of children and mother in LDP data
Get parameters of children and mother in CHILDES data
childes_intercept <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/childes_intercept.feather")
ldp_intercept <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/ldp_intercept.feather")
# variance of children's intercept
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept,
mlu_intercept, sen_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 8 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 cdi_intercept 497.5050370 152.25178818 0.30603065
## 2 mattr_intercept 0.4157072 0.04125706 0.09924548
## 3 mlu_intercept 2.3530655 0.35607149 0.15132239
## 4 mtld_intercept 12.6276056 2.44016389 0.19324043
## 5 ppvt_intercept 27.4537313 10.91072920 0.39742245
## 6 sen_intercept 19.2577319 11.35482955 0.58962445
## 7 ttr_intercept 0.1934905 0.02762355 0.14276435
## 8 vocd_intercept 29.1841902 1.77017781 0.06065537
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_intercept,mattr_intercept, vocd_intercept,
ttr_intercept, mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mattr_intercept 0.5575220 0.04228984 0.07585322
## 2 mlu_intercept 3.4293445 0.70482848 0.20552863
## 3 mtld_intercept 15.8337543 3.27785922 0.20701718
## 4 ttr_intercept 0.2347557 0.10931233 0.46564283
## 5 vocd_intercept 30.6584564 2.19072708 0.07145588
ldp_intercept %>%
gather(measure, value,
cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept,
mlu_intercept, sen_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "coef_of_var of Children's Intercept",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_intercept,mattr_intercept, vocd_intercept,
ttr_intercept, mlu_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Intercept",
subtitle = "CHILDES: 14 ~ 58 Months")
# variance of children's slope
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope,
vocd_slope, ttr_slope, mlu_slope, sen_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 8 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 cdi_slope 836.30454069 131.54764297 0.15729634
## 2 mattr_slope 0.22790449 0.05044707 0.22135179
## 3 mlu_slope 2.34837035 0.22141643 0.09428514
## 4 mtld_slope 18.03235698 2.86834299 0.15906645
## 5 ppvt_slope 77.49354350 18.14881161 0.23419773
## 6 sen_slope 39.04148776 19.12625515 0.48989565
## 7 ttr_slope -0.02842861 0.04605768 -1.62011720
## 8 vocd_slope 10.33666075 2.91275673 0.28178895
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_slope, mattr_slope, vocd_slope, ttr_slope, mlu_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mattr_slope 0.19600032 0.09697797 0.49478477
## 2 mlu_slope 1.22627619 0.09686533 0.07899144
## 3 mtld_slope 18.97232656 4.91981046 0.25931508
## 4 ttr_slope -0.08196691 0.09686533 -1.18176134
## 5 vocd_slope 7.46758510 6.60336456 0.88427041
ldp_intercept %>%
gather(measure, value,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope,
vocd_slope, ttr_slope, mlu_slope, sen_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Slope",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_slope, mattr_slope, vocd_slope, ttr_slope, mlu_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Slope",
subtitle = "CHILDES: 14 ~ 58 Months")
# variance of mother's intercept
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_intercept 0.5634104 0.02346517 0.04164845
## 2 mom_mlu_intercept 4.1064351 0.40644597 0.09897782
## 3 mom_mtld_intercept 31.4707075 5.16404671 0.16409058
## 4 mom_ttr_intercept 0.1588064 0.04021612 0.25323995
## 5 mom_vocd_intercept 34.2712969 0.54257524 0.01583177
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept, mom_mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_intercept 0.6784563 0.0250928 0.03698514
## 2 mom_mlu_intercept 4.2069000 0.8785460 0.20883453
## 3 mom_mtld_intercept 34.0199253 5.2203424 0.15344956
## 4 mom_ttr_intercept 0.2655001 0.1355026 0.51036743
## 5 mom_vocd_intercept 32.9539727 0.3893581 0.01181521
ldp_intercept %>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Intercept",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept, mom_mlu_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Intercept",
subtitle = "CHILDES: 14 ~ 58 Months")
# variance of mother's slope
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope,mom_mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_slope 0.06334250 0.01283598 0.20264410
## 2 mom_mlu_intercept 4.10643507 0.40644597 0.09897782
## 3 mom_mtld_slope 14.80122629 2.57400445 0.17390481
## 4 mom_ttr_slope 0.04688715 0.05098465 1.08739070
## 5 mom_vocd_slope 0.56419163 0.23421548 0.41513463
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope,
mom_ttr_slope, mom_mlu_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_slope 0.05056424 0.03349923 0.6625082
## 2 mom_mlu_slope 0.58665441 0.58185294 0.9918155
## 3 mom_mtld_slope 10.82447528 5.96172968 0.5507638
## 4 mom_ttr_slope 0.03423628 0.04476633 1.3075696
## 5 mom_vocd_slope 0.67090861 0.80378108 1.1980485
ldp_intercept %>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope,
mom_ttr_slope,mom_mlu_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Slope",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope,
mom_ttr_slope, mom_mlu_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Slope",
subtitle = "CHILDES: 14 ~ 58 Months")
# correlation plot of child's intercept
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept,
mlu_intercept, sen_intercept)%>%
cor() %>%
corrplot::corrplot(method = "square", type = "upper")
childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mtld_intercept,mattr_intercept, vocd_intercept,
ttr_intercept,mlu_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# correlation plot of mother's intercept
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# correlation plot of child's slope
ldp_intercept %>%
filter(complete.cases(.))%>%
select(cdi_slope, ppvt_slope, mtld_slope, mattr_slope,
vocd_slope, ttr_slope, sen_slope, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type = "upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mtld_slope, mattr_slope,
vocd_slope, ttr_slope, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# correlation plot of mother's slope
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope, mom_mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# plot all parameters of children
ldp_intercept%>%
filter(complete.cases(.))%>%
select(cdi_intercept, ppvt_intercept, mtld_intercept, mattr_intercept,
vocd_intercept, ttr_intercept, mlu_intercept, sen_intercept,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope, vocd_slope,
ttr_slope, mlu_slope, sen_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mtld_intercept, mattr_intercept,vocd_intercept, ttr_intercept,
mtld_slope, mattr_slope, vocd_slope, ttr_slope,
mlu_intercept, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
# plot all parametes of mothers
ldp_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept,
mom_ttr_intercept, mom_mlu_intercept, mom_mtld_slope,
mom_mattr_slope, mom_vocd_slope, mom_ttr_slope, mom_mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope,
mom_mlu_intercept, mom_mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
# plot parameters of child and mother
ldp_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept, mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope, mom_mlu_slope,
mtld_intercept, mtld_slope,
mattr_intercept, mattr_slope,
vocd_intercept, vocd_slope,
ttr_intercept, ttr_slope,
mlu_intercept, mlu_slope,
sen_intercept, sen_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope,
mom_mlu_intercept, mom_mlu_slope,
mtld_intercept, mtld_slope,
mattr_intercept,mattr_slope,
vocd_intercept, vocd_slope,
ttr_intercept, ttr_slope,
mlu_intercept, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")